This notebook provides examples of the return values for the hgvs data provider


In [1]:
import datetime
import json

from hgvs.dataproviders.uta import connect
hdp = connect()

In [2]:
def json_encoder(o):
    if isinstance(o, datetime.datetime):
        return o.isoformat()
    # o must be serializable
    return o
def print_json(data):
    if type(data) is list:
        ddata = [dict(d) for d in data]
    else:
        ddata = dict(data)
    return print(json.dumps(ddata, default=json_encoder, indent=4, sort_keys=True))
pj = print_json

get_acs_for_protein_seq (DEPRECATED)

Uses:

  • hgvs/utils/reftranscriptdata.py

get_gene_info

Uses: Not used in hgvs


In [3]:
pj(hdp.get_gene_info("VHL"))


{
    "added": "2014-02-10T22:59:21.153414",
    "aliases": "{HRCA1,RCA1,VHL1,pVHL}",
    "descr": "von Hippel-Lindau tumor suppressor",
    "hgnc": "VHL",
    "maploc": "3p25.3",
    "summary": "von Hippel-Lindau tumor suppressor"
}

get_seq

Uses:

  • hgvs/validator.py
  • hgvs/normalizer.py
  • hgvs/utils/reftranscriptdata.py
  • hgvs/utils/context.py
  • hgvs/variantmapper.py

In [4]:
hdp.get_seq("NM_199425.2")


Out[4]:
'CCTAGAATGGGGGTGGGGTGGGGTGGGGTGGGCTGGACAGAAGAGAGGAGGAGAAGGAGGTGACTGAGGGGACTGCAGCTGGGTGGGCGGTAACCGAGGGGAGGGGAACTGGTGGCGTCCCCATCTCGCGGGGTCCGGAACGGCGACGCGCCCGCGCCCAGCTGATTGGAGCCCTTCAGGCCTCCCGCGCCCGACCGGCAGCCCAATCCTATAAAGCTTCCTCTAAGCTGGGCCCTCCGCAAACGGGATCCAGAGAGGCTCGCGCCTTGCTTGCTAAGGAACCATGACCGGCCGGGACTCGCTTTCCGACGGGCGCACTAGCAGCAGGGCGCTGGTGCCTGGCGGTTCCCCTAGGGGCTCGCGCCCCCGGGGCTTCGCCATCACGGACCTGCTGGGCTTGGAGGCCGAGCTGCCGGCGCCCGCTGGCCCAGGACAGGGATCTGGCTGCGAGGGTCCGGCAGTCGCGCCGTGCCCGGGCCCGGGGCTTGACGGCTCCAGCCTGGCGCGTGGGGCCCTACCGCTGGGACTCGGCCTCCTCTGTGGCTTCGGCACGCAGCCGCCGGCGGCCGCTCGAGCACCCTGCCTGCTCCTAGCGGACGTGCCGTTCCTGCCGCCCAGGGGCCCCGAGCCCGCTGCCCCGCTGGCTCCCAGCCGTCCGCCGCCTGCGCTCGGCCGCCAGAAGCGCAGCGACAGCGTCTCCACGTCCGATGAGGACAGCCAGTCTGAAGACAGGAATGACCTAAAGGCATCCCCCACCTTGGGCAAGAGGAAGAAGCGGCGGCACAGGACAGTTTTCACTGCTCACCAGCTGGAAGAGTTGGAGAAGGCATTCAGCGAGGCCCACTACCCTGATGTGTATGCCCGAGAAATGCTGGCTGTGAAAACTGAGCTCCCCGAAGACCGGATACAGGTGTCTGGGGTCCCTTTTCTCCGCTCCAAAGATACCACAGAGAACGTGTCATTCCCACATTCAGTGAGCCAATCAGCAGTCCCTTCTCTATAGCCAACACGCTCCCTTTGCATAGAAACTGAGGGTCCCTTAGCTGAAGGCACCACAAGAGCTTGCCCCTATGACCCTCATGCCTTTTTTCATTTTATTATTATTTAGTATCAAATCATTCTTTAAAATCACATGATAATGTGTGCTGCAACTAGAAACTATAAAGATATGTCAATGAAAAAAAAAAAAAAGAGAC'

get_tx_exons

Uses:

  • hgvs/normalizer.py
  • hgvs/alignmentmapper.py
  • hgvs/transcriptmapper.py

In [5]:
pj(hdp.get_tx_exons("NM_199425.2", "NC_000020.10", "splign"))


[
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "alt_aseq": null,
        "alt_end_i": 25059588,
        "alt_exon_id": 2999028,
        "alt_exon_set_id": 298679,
        "alt_start_i": 25059178,
        "alt_strand": -1,
        "cigar": "410=",
        "exon_aln_id": 1148619,
        "hgnc": "VSX1",
        "ord": 2,
        "tx_ac": "NM_199425.2",
        "tx_aseq": null,
        "tx_end_i": 1196,
        "tx_exon_id": 936834,
        "tx_exon_set_id": 98390,
        "tx_start_i": 786
    },
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "alt_aseq": null,
        "alt_end_i": 25060150,
        "alt_exon_id": 2999027,
        "alt_exon_set_id": 298679,
        "alt_start_i": 25060071,
        "alt_strand": -1,
        "cigar": "79=",
        "exon_aln_id": 1148632,
        "hgnc": "VSX1",
        "ord": 1,
        "tx_ac": "NM_199425.2",
        "tx_aseq": null,
        "tx_end_i": 786,
        "tx_exon_id": 936833,
        "tx_exon_set_id": 98390,
        "tx_start_i": 707
    },
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "alt_aseq": null,
        "alt_end_i": 25063015,
        "alt_exon_id": 2999026,
        "alt_exon_set_id": 298679,
        "alt_start_i": 25062308,
        "alt_strand": -1,
        "cigar": "707=",
        "exon_aln_id": 1148596,
        "hgnc": "VSX1",
        "ord": 0,
        "tx_ac": "NM_199425.2",
        "tx_aseq": null,
        "tx_end_i": 707,
        "tx_exon_id": 936832,
        "tx_exon_set_id": 98390,
        "tx_start_i": 0
    }
]

get_tx_for_gene

Uses: Not used by HGVS (but useful)


In [6]:
pj(hdp.get_tx_for_gene("VHL"))


[
    {
        "alt_ac": "AC_000135.1",
        "alt_aln_method": "splign",
        "cds_end_i": 855,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_000551.3"
    },
    {
        "alt_ac": "NC_000003.11",
        "alt_aln_method": "blat",
        "cds_end_i": 855,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_000551.3"
    },
    {
        "alt_ac": "NC_000003.11",
        "alt_aln_method": "splign",
        "cds_end_i": 855,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_000551.3"
    },
    {
        "alt_ac": "NC_000003.12",
        "alt_aln_method": "splign",
        "cds_end_i": 855,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_000551.3"
    },
    {
        "alt_ac": "NC_018914.2",
        "alt_aln_method": "splign",
        "cds_end_i": 855,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_000551.3"
    },
    {
        "alt_ac": "NG_008212.3",
        "alt_aln_method": "splign",
        "cds_end_i": 855,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_000551.3"
    },
    {
        "alt_ac": "AC_000135.1",
        "alt_aln_method": "splign",
        "cds_end_i": 732,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_198156.2"
    },
    {
        "alt_ac": "NC_000003.11",
        "alt_aln_method": "blat",
        "cds_end_i": 732,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_198156.2"
    },
    {
        "alt_ac": "NC_000003.11",
        "alt_aln_method": "splign",
        "cds_end_i": 732,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_198156.2"
    },
    {
        "alt_ac": "NC_000003.12",
        "alt_aln_method": "splign",
        "cds_end_i": 732,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_198156.2"
    },
    {
        "alt_ac": "NC_018914.2",
        "alt_aln_method": "splign",
        "cds_end_i": 732,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_198156.2"
    },
    {
        "alt_ac": "NC_000003.11",
        "alt_aln_method": "genebuild",
        "cds_end_i": 1482,
        "cds_start_i": 840,
        "hgnc": "VHL",
        "tx_ac": "ENST00000256474"
    },
    {
        "alt_ac": "NC_000003.11",
        "alt_aln_method": "genebuild",
        "cds_end_i": 579,
        "cds_start_i": 60,
        "hgnc": "VHL",
        "tx_ac": "ENST00000345392"
    },
    {
        "alt_ac": "NC_000003.11",
        "alt_aln_method": "genebuild",
        "cds_end_i": null,
        "cds_start_i": null,
        "hgnc": "VHL",
        "tx_ac": "ENST00000477538"
    },
    {
        "alt_ac": "NC_000003.11",
        "alt_aln_method": "splign",
        "cds_end_i": 795,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_001354723.1"
    },
    {
        "alt_ac": "NC_000003.12",
        "alt_aln_method": "splign",
        "cds_end_i": 795,
        "cds_start_i": 213,
        "hgnc": "VHL",
        "tx_ac": "NM_001354723.1"
    }
]

get_tx_for_region

Uses:

  • hgvs/assemblymapper.py

In [7]:
pj(hdp.get_tx_for_region("NC_000020.10", "splign", 25059178, 25059588))


[
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "alt_strand": -1,
        "end_i": 25063015,
        "start_i": 25051520,
        "tx_ac": "NM_001256272.1"
    },
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "alt_strand": -1,
        "end_i": 25063015,
        "start_i": 25051520,
        "tx_ac": "NR_045948.1"
    },
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "alt_strand": -1,
        "end_i": 25063015,
        "start_i": 25056071,
        "tx_ac": "NM_014588.5"
    },
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "alt_strand": -1,
        "end_i": 25063015,
        "start_i": 25051520,
        "tx_ac": "NR_045951.1"
    },
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "alt_strand": -1,
        "end_i": 25063015,
        "start_i": 25051520,
        "tx_ac": "NM_001256271.1"
    }
]

get_tx_identity_info

Uses:

  • hgvs/alignmentmapper.py
  • hgvs/validator.py
  • hgvs/normalizer.py
  • hgvs/utils/reftranscriptdata.py
  • hgvs/transcriptmapper.py

In [8]:
pj(hdp.get_tx_identity_info("NM_001256272.1"))


{
    "alt_ac": "NM_001256272.1",
    "alt_aln_method": "transcript",
    "cds_end_i": 1189,
    "cds_start_i": 283,
    "hgnc": "VSX1",
    "lengths": [
        707,
        79,
        124,
        181,
        1134
    ],
    "tx_ac": "NM_001256272.1"
}

get_tx_info

Uses:

  • hgvs/alignmentmapper.py
  • hgvs/normalizer.py
  • hgvs/transcriptmapper.py

In [9]:
pj(hdp.get_tx_info("NM_001256272.1", "NC_000020.10", "splign"))


{
    "alt_ac": "NC_000020.10",
    "alt_aln_method": "splign",
    "cds_end_i": 1189,
    "cds_start_i": 283,
    "hgnc": "VSX1",
    "tx_ac": "NM_001256272.1"
}

get_tx_mapping_options

Uses:

  • hgvs/normalizer.py
  • hgvs/assemblymapper.py

In [10]:
pj(hdp.get_tx_mapping_options("NM_001256272.1"))


[
    {
        "alt_ac": "NM_001256272.1",
        "alt_aln_method": "transcript/4d52a1d2",
        "tx_ac": "NM_001256272.1"
    },
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "blat",
        "tx_ac": "NM_001256272.1"
    },
    {
        "alt_ac": "NC_018931.2",
        "alt_aln_method": "splign",
        "tx_ac": "NM_001256272.1"
    },
    {
        "alt_ac": "NC_000020.11",
        "alt_aln_method": "splign",
        "tx_ac": "NM_001256272.1"
    },
    {
        "alt_ac": "NC_000020.10",
        "alt_aln_method": "splign",
        "tx_ac": "NM_001256272.1"
    },
    {
        "alt_ac": "AC_000152.1",
        "alt_aln_method": "splign",
        "tx_ac": "NM_001256272.1"
    }
]

get_similar_transcripts

Uses: Not used by hgvs


In [11]:
pj(hdp.get_similar_transcripts("NM_001256272.1"))


[
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000398332"
    },
    {
        "cds_eq": null,
        "cds_es_fp_eq": null,
        "cds_exon_lengths_fp_eq": null,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000409958"
    },
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000424574"
    },
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000451258"
    },
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "NM_199425.2"
    },
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "NM_001256271.1"
    },
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000376709"
    },
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000444511"
    },
    {
        "cds_eq": null,
        "cds_es_fp_eq": null,
        "cds_exon_lengths_fp_eq": null,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000409285"
    },
    {
        "cds_eq": null,
        "cds_es_fp_eq": null,
        "cds_exon_lengths_fp_eq": null,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "NR_045951.1"
    },
    {
        "cds_eq": null,
        "cds_es_fp_eq": null,
        "cds_exon_lengths_fp_eq": null,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "NR_045948.1"
    },
    {
        "cds_eq": null,
        "cds_es_fp_eq": null,
        "cds_exon_lengths_fp_eq": null,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000557285"
    },
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "NM_014588.5"
    },
    {
        "cds_eq": false,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": false,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000376707"
    },
    {
        "cds_eq": true,
        "cds_es_fp_eq": false,
        "cds_exon_lengths_fp_eq": true,
        "es_fp_eq": false,
        "hgnc_eq": true,
        "tx_ac1": "NM_001256272.1",
        "tx_ac2": "ENST00000429762"
    }
]

get_pro_ac_for_tx_ac

Uses:

  • hgvs/utils/reftranscriptdata.py

In [12]:
hdp.get_pro_ac_for_tx_ac("NM_001256272.1")


Out[12]:
'NP_001243201.1'

In [ ]: